1
2
3
4
5
6
7
8
9
10
11 """Restriction Enzyme classes.
12
13 Notes about the diverses class of the restriction enzyme implementation::
14
15 RestrictionType is the type of all restriction enzymes.
16 ----------------------------------------------------------------------------
17 AbstractCut implements some methods that are common to all enzymes.
18 ----------------------------------------------------------------------------
19 NoCut, OneCut,TwoCuts represent the number of double strand cuts
20 produced by the enzyme.
21 they correspond to the 4th field of the rebase
22 record emboss_e.NNN.
23 0->NoCut : the enzyme is not characterised.
24 2->OneCut : the enzyme produce one double strand cut.
25 4->TwoCuts : two double strand cuts.
26 ----------------------------------------------------------------------------
27 Meth_Dep, Meth_Undep represent the methylation susceptibility to
28 the enzyme.
29 Not implemented yet.
30 ----------------------------------------------------------------------------
31 Palindromic, if the site is palindromic or not.
32 NotPalindromic allow some optimisations of the code.
33 No need to check the reverse strand
34 with palindromic sites.
35 ----------------------------------------------------------------------------
36 Unknown, Blunt, represent the overhang.
37 Ov5, Ov3 Unknown is here for symetry reasons and
38 correspond to enzymes that are not characterised
39 in rebase.
40 ----------------------------------------------------------------------------
41 Defined, Ambiguous, represent the sequence of the overhang.
42 NotDefined
43 NotDefined is for enzymes not characterised in
44 rebase.
45
46 Defined correspond to enzymes that display a
47 constant overhang whatever the sequence.
48 ex : EcoRI. G^AATTC -> overhang :AATT
49 CTTAA^G
50
51 Ambiguous : the overhang varies with the
52 sequence restricted.
53 Typically enzymes which cut outside their
54 restriction site or (but not always)
55 inside an ambiguous site.
56 ex:
57 AcuI CTGAAG(22/20) -> overhang : NN
58 AasI GACNNN^NNNGTC -> overhang : NN
59 CTGN^NNNNNCAG
60
61 note : these 3 classes refers to the overhang not the site.
62 So the enzyme ApoI (RAATTY) is defined even if its restriction
63 site is ambiguous.
64
65 ApoI R^AATTY -> overhang : AATT -> Defined
66 YTTAA^R
67 Accordingly, blunt enzymes are always Defined even
68 when they cut outside their restriction site.
69 ----------------------------------------------------------------------------
70 Not_available, as found in rebase file emboss_r.NNN files.
71 Commercially_available
72 allow the selection of the enzymes according to
73 their suppliers to reduce the quantity
74 of results.
75 Also will allow the implementation of buffer
76 compatibility tables. Not implemented yet.
77
78 the list of suppliers is extracted from
79 emboss_s.NNN
80 ----------------------------------------------------------------------------
81 """
82
83 from __future__ import print_function
84 from Bio._py3k import zip
85 from Bio._py3k import filter
86 from Bio._py3k import range
87
88 import re
89 import itertools
90
91 from Bio.Seq import Seq, MutableSeq
92 from Bio.Alphabet import IUPAC
93
94 from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict
95 from Bio.Restriction.Restriction_Dictionary import typedict
96 from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict
97
98 from Bio.Restriction.RanaConfig import *
99 from Bio.Restriction.PrintFormat import PrintFormat
100
101 __docformat__ = "restructuredtext en"
108 """Check characters in a string (PRIVATE).
109
110 Remove digits and white space present in string. Allows any valid ambiguous
111 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted).
112
113 Other characters (e.g. symbols) trigger a TypeError.
114
115 Returns the string WITH A LEADING SPACE (!). This is for backwards
116 compatibility, and may in part be explained by the fact that
117 Bio.Restriction doesn't use zero based counting.
118 """
119
120 seq_string = "".join(seq_string.split()).upper()
121
122 for c in "0123456789":
123 seq_string = seq_string.replace(c, "")
124
125 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")):
126 raise TypeError("Invalid character found in %s" % repr(seq_string))
127 return " " + seq_string
128
129
130 matching = {'A': 'ARWMHVDN', 'C': 'CYSMHBVN', 'G': 'GRSKBVDN',
131 'T': 'TYWKHBDN', 'R': 'ABDGHKMNSRWV', 'Y': 'CBDHKMNSTWVY',
132 'W': 'ABDHKMNRTWVY', 'S': 'CBDGHKMNSRVY', 'M': 'ACBDHMNSRWVY',
133 'K': 'BDGHKNSRTWVY', 'H': 'ACBDHKMNSRTWVY',
134 'B': 'CBDGHKMNSRTWVY', 'V': 'ACBDGHKMNSRWVY',
135 'D': 'ABDGHKMNSRTWVY', 'N': 'ACBDGHKMNSRTWVY'}
136
137 DNA = Seq
238
241 """RestrictionType. Type from which derives all enzyme classes.
242
243 Implement the operator methods."""
244
245 - def __init__(cls, name='', bases=(), dct={}):
246 """RE(name, bases, dct) -> RestrictionType instance.
247
248 Not intended to be used in normal operation. The enzymes are
249 instantiated when importing the module.
250
251 see below."""
252 if "-" in name:
253 raise ValueError("Problem with hyphen in %s as enzyme name"
254 % repr(name))
255
256
257
258 try:
259 cls.compsite = re.compile(cls.compsite)
260 except Exception as err:
261 raise ValueError("Problem with regular expression, re.compiled(%s)"
262 % repr(cls.compsite))
263
275
277 """RE.__div__(other) -> list.
278
279 RE/other
280 returns RE.search(other)."""
281 return cls.search(other)
282
284 """RE.__rdiv__(other) -> list.
285
286 other/RE
287 returns RE.search(other)."""
288 return cls.search(other)
289
291 """RE.__truediv__(other) -> list.
292
293 RE/other
294 returns RE.search(other)."""
295 return cls.search(other)
296
298 """RE.__rtruediv__(other) -> list.
299
300 other/RE
301 returns RE.search(other)."""
302 return cls.search(other)
303
305 """RE.__floordiv__(other) -> list.
306
307 RE//other
308 returns RE.catalyse(other)."""
309 return cls.catalyse(other)
310
312 """RE.__rfloordiv__(other) -> list.
313
314 other//RE
315 returns RE.catalyse(other)."""
316 return cls.catalyse(other)
317
319 """RE.__str__() -> str.
320
321 return the name of the enzyme."""
322 return cls.__name__
323
325 """RE.__repr__() -> str.
326
327 used with eval or exec will instantiate the enzyme."""
328 return "%s" % cls.__name__
329
331 """RE.__len__() -> int.
332
333 length of the recognition site."""
334 return cls.size
335
337
338
339 return id(cls)
340
342 """RE == other -> bool
343
344 True if RE and other are the same enzyme.
345
346 Specifically this checks they are the same Python object.
347 """
348
349 return id(cls) == id(other)
350
352 """RE != other -> bool.
353 isoschizomer strict, same recognition site, same restriction -> False
354 all the other-> True
355
356 WARNING - This is not the inverse of the __eq__ method.
357 """
358 if not isinstance(other, RestrictionType):
359 return True
360 elif cls.charac == other.charac:
361 return False
362 else:
363 return True
364
366 """RE >> other -> bool.
367
368 neoschizomer : same recognition site, different restriction. -> True
369 all the others : -> False"""
370 if not isinstance(other, RestrictionType):
371 return False
372 elif cls.site == other.site and cls.charac != other.charac:
373 return True
374 else:
375 return False
376
378 """a % b -> bool.
379
380 Test compatibility of the overhang of a and b.
381 True if a and b have compatible overhang."""
382 if not isinstance(other, RestrictionType):
383 raise TypeError(
384 'expected RestrictionType, got %s instead' % type(other))
385 return cls._mod1(other)
386
388 """a >= b -> bool.
389
390 a is greater or equal than b if the a site is longer than b site.
391 if their site have the same length sort by alphabetical order of their
392 names."""
393 if not isinstance(other, RestrictionType):
394 raise NotImplementedError
395 if len(cls) > len(other):
396 return True
397 elif cls.size == len(other) and cls.__name__ >= other.__name__:
398 return True
399 else:
400 return False
401
403 """a > b -> bool.
404
405 sorting order:
406 1. size of the recognition site.
407 2. if equal size, alphabetical order of the names."""
408 if not isinstance(other, RestrictionType):
409 raise NotImplementedError
410 if len(cls) > len(other):
411 return True
412 elif cls.size == len(other) and cls.__name__ > other.__name__:
413 return True
414 else:
415 return False
416
418 """a <= b -> bool.
419
420 sorting order:
421 1. size of the recognition site.
422 2. if equal size, alphabetical order of the names."""
423 if not isinstance(other, RestrictionType):
424 raise NotImplementedError
425 elif len(cls) < len(other):
426 return True
427 elif len(cls) == len(other) and cls.__name__ <= other.__name__:
428 return True
429 else:
430 return False
431
433 """a < b -> bool.
434
435 sorting order:
436 1. size of the recognition site.
437 2. if equal size, alphabetical order of the names."""
438 if not isinstance(other, RestrictionType):
439 raise NotImplementedError
440 elif len(cls) < len(other):
441 return True
442 elif len(cls) == len(other) and cls.__name__ < other.__name__:
443 return True
444 else:
445 return False
446
449 """Implement the methods that are common to all restriction enzymes.
450
451 All the methods are classmethod.
452
453 For internal use only. Not meant to be instantiate."""
454
455 @classmethod
456 - def search(cls, dna, linear=True):
457 """RE.search(dna, linear=True) -> list.
458
459 return a list of all the site of RE in dna. Compensate for circular
460 sequences and so on.
461
462 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
463
464 if linear is False, the restriction sites than span over the boundaries
465 will be included.
466
467 The positions are the first base of the 3' fragment,
468 i.e. the first base after the position the enzyme will cut. """
469
470
471
472
473
474
475
476 if isinstance(dna, FormattedSeq):
477 cls.dna = dna
478 return cls._search()
479 else:
480 cls.dna = FormattedSeq(dna, linear)
481 return cls._search()
482
483 @classmethod
485 """RE.all_suppliers -> print all the suppliers of R"""
486 supply = sorted(x[0] for x in suppliers_dict.values())
487 print(",\n".join(supply))
488 return
489
490 @classmethod
492 """RE.is_equischizomers(other) -> bool.
493
494 True if other is an isoschizomer of RE.
495 False else.
496
497 equischizomer <=> same site, same position of restriction."""
498 return not self != other
499
500 @classmethod
502 """RE.is_neoschizomers(other) -> bool.
503
504 True if other is an isoschizomer of RE.
505 False else.
506
507 neoschizomer <=> same site, different position of restriction."""
508 return self >> other
509
510 @classmethod
512 """RE.is_isoschizomers(other) -> bool.
513
514 True if other is an isoschizomer of RE.
515 False else.
516
517 isoschizomer <=> same site."""
518 return (not self != other) or self >> other
519
520 @classmethod
522 """RE.equischizomers([batch]) -> list.
523
524 return a tuple of all the isoschizomers of RE.
525 if batch is supplied it is used instead of the default AllEnzymes.
526
527 equischizomer <=> same site, same position of restriction."""
528 if not batch:
529 batch = AllEnzymes
530 r = [x for x in batch if not self != x]
531 i = r.index(self)
532 del r[i]
533 r.sort()
534 return r
535
536 @classmethod
538 """RE.neoschizomers([batch]) -> list.
539
540 return a tuple of all the neoschizomers of RE.
541 if batch is supplied it is used instead of the default AllEnzymes.
542
543 neoschizomer <=> same site, different position of restriction."""
544 if not batch:
545 batch = AllEnzymes
546 r = sorted(x for x in batch if self >> x)
547 return r
548
549 @classmethod
551 """RE.isoschizomers([batch]) -> list.
552
553 return a tuple of all the equischizomers and neoschizomers of RE.
554 if batch is supplied it is used instead of the default AllEnzymes."""
555 if not batch:
556 batch = AllEnzymes
557 r = [x for x in batch if (self >> x) or (not self != x)]
558 i = r.index(self)
559 del r[i]
560 r.sort()
561 return r
562
563 @classmethod
565 """RE.frequency() -> int.
566
567 frequency of the site."""
568 return self.freq
569
570
571 -class NoCut(AbstractCut):
572 """Implement the methods specific to the enzymes that do not cut.
573
574 These enzymes are generally enzymes that have been only partially
575 characterised and the way they cut the DNA is unknow or enzymes for
576 which the pattern of cut is to complex to be recorded in Rebase
577 (ncuts values of 0 in emboss_e.###).
578
579 When using search() with these enzymes the values returned are at the start of
580 the restriction site.
581
582 Their catalyse() method returns a TypeError.
583
584 Unknown and NotDefined are also part of the base classes of these enzymes.
585
586 Internal use only. Not meant to be instantiated."""
587
588 @classmethod
590 """RE.cut_once() -> bool.
591
592 True if the enzyme cut the sequence one time on each strand."""
593 return False
594
595 @classmethod
597 """RE.cut_twice() -> bool.
598
599 True if the enzyme cut the sequence twice on each strand."""
600 return False
601
602 @classmethod
604 """RE._modify(location) -> int.
605
606 for internal use only.
607
608 location is an integer corresponding to the location of the match for
609 the enzyme pattern in the sequence.
610 _modify returns the real place where the enzyme will cut.
611
612 example::
613
614 EcoRI pattern : GAATTC
615 EcoRI will cut after the G.
616 so in the sequence:
617 ______
618 GAATACACGGAATTCGA
619 |
620 10
621 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
622 EcoRI cut after the G so:
623 EcoRI._modify(10) -> 11.
624
625 if the enzyme cut twice _modify will returns two integer corresponding
626 to each cutting site.
627 """
628 yield location
629
630 @classmethod
632 """RE._rev_modify(location) -> generator of int.
633
634 for internal use only.
635
636 as _modify for site situated on the antiparallel strand when the
637 enzyme is not palindromic
638 """
639 yield location
640
641 @classmethod
643 """RE.characteristic() -> tuple.
644
645 the tuple contains the attributes:
646 fst5 -> first 5' cut ((current strand) or None
647 fst3 -> first 3' cut (complementary strand) or None
648 scd5 -> second 5' cut (current strand) or None
649 scd5 -> second 3' cut (complementary strand) or None
650 site -> recognition site."""
651 return None, None, None, None, self.site
652
653
654 -class OneCut(AbstractCut):
655 """Implement the methods specific to the enzymes that cut the DNA only once
656
657 Correspond to ncuts values of 2 in emboss_e.###
658
659 Internal use only. Not meant to be instantiated."""
660
661 @classmethod
663 """RE.cut_once() -> bool.
664
665 True if the enzyme cut the sequence one time on each strand."""
666 return True
667
668 @classmethod
670 """RE.cut_twice() -> bool.
671
672 True if the enzyme cut the sequence twice on each strand."""
673 return False
674
675 @classmethod
677 """RE._modify(location) -> int.
678
679 for internal use only.
680
681 location is an integer corresponding to the location of the match for
682 the enzyme pattern in the sequence.
683 _modify returns the real place where the enzyme will cut.
684
685 example::
686
687 EcoRI pattern : GAATTC
688 EcoRI will cut after the G.
689 so in the sequence:
690 ______
691 GAATACACGGAATTCGA
692 |
693 10
694 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
695 EcoRI cut after the G so:
696 EcoRI._modify(10) -> 11.
697
698 if the enzyme cut twice _modify will returns two integer corresponding
699 to each cutting site.
700 """
701 yield location + self.fst5
702
703 @classmethod
705 """RE._rev_modify(location) -> generator of int.
706
707 for internal use only.
708
709 as _modify for site situated on the antiparallel strand when the
710 enzyme is not palindromic
711 """
712 yield location - self.fst3
713
714 @classmethod
716 """RE.characteristic() -> tuple.
717
718 the tuple contains the attributes:
719 fst5 -> first 5' cut ((current strand) or None
720 fst3 -> first 3' cut (complementary strand) or None
721 scd5 -> second 5' cut (current strand) or None
722 scd5 -> second 3' cut (complementary strand) or None
723 site -> recognition site."""
724 return self.fst5, self.fst3, None, None, self.site
725
728 """Implement the methods specific to the enzymes that cut the DNA twice
729
730 Correspond to ncuts values of 4 in emboss_e.###
731
732 Internal use only. Not meant to be instantiated."""
733
734 @classmethod
736 """RE.cut_once() -> bool.
737
738 True if the enzyme cut the sequence one time on each strand."""
739 return False
740
741 @classmethod
743 """RE.cut_twice() -> bool.
744
745 True if the enzyme cut the sequence twice on each strand."""
746 return True
747
748 @classmethod
750 """RE._modify(location) -> int.
751
752 for internal use only.
753
754 location is an integer corresponding to the location of the match for
755 the enzyme pattern in the sequence.
756 _modify returns the real place where the enzyme will cut.
757
758 example::
759
760 EcoRI pattern : GAATTC
761 EcoRI will cut after the G.
762 so in the sequence:
763 ______
764 GAATACACGGAATTCGA
765 |
766 10
767 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base
768 EcoRI cut after the G so:
769 EcoRI._modify(10) -> 11.
770
771 if the enzyme cut twice _modify will returns two integer corresponding
772 to each cutting site.
773 """
774 yield location + self.fst5
775 yield location + self.scd5
776
777 @classmethod
779 """RE._rev_modify(location) -> generator of int.
780
781 for internal use only.
782
783 as _modify for site situated on the antiparallel strand when the
784 enzyme is not palindromic
785 """
786 yield location - self.fst3
787 yield location - self.scd3
788
789 @classmethod
791 """RE.characteristic() -> tuple.
792
793 the tuple contains the attributes:
794 fst5 -> first 5' cut ((current strand) or None
795 fst3 -> first 3' cut (complementary strand) or None
796 scd5 -> second 5' cut (current strand) or None
797 scd5 -> second 3' cut (complementary strand) or None
798 site -> recognition site."""
799 return self.fst5, self.fst3, self.scd5, self.scd3, self.site
800
803 """Implement the information about methylation.
804
805 Enzymes of this class possess a site which is methylable."""
806
807 @classmethod
809 """RE.is_methylable() -> bool.
810
811 True if the recognition site is a methylable."""
812 return True
813
816 """Implement information about methylation sensitibility.
817
818 Enzymes of this class are not sensible to methylation."""
819
820 @classmethod
822 """RE.is_methylable() -> bool.
823
824 True if the recognition site is a methylable."""
825 return False
826
829 """Implement the methods specific to the enzymes which are palindromic
830
831 palindromic means : the recognition site and its reverse complement are
832 identical.
833 Remarks : an enzyme with a site CGNNCG is palindromic even if some
834 of the sites that it will recognise are not.
835 for example here : CGAACG
836
837 Internal use only. Not meant to be instantiated."""
838
839 @classmethod
841 """RE._search() -> list.
842
843 for internal use only.
844
845 implement the search method for palindromic and non palindromic enzyme.
846 """
847 siteloc = self.dna.finditer(self.compsite, self.size)
848 self.results = [r for s, g in siteloc for r in self._modify(s)]
849 if self.results:
850 self._drop()
851 return self.results
852
853 @classmethod
855 """RE.is_palindromic() -> bool.
856
857 True if the recognition site is a palindrom."""
858 return True
859
862 """Implement the methods specific to the enzymes which are not palindromic
863
864 palindromic means : the recognition site and its reverse complement are
865 identical.
866
867 Internal use only. Not meant to be instantiated."""
868
869 @classmethod
871 """RE._search() -> list.
872
873 for internal use only.
874
875 implement the search method for palindromic and non palindromic enzyme.
876 """
877 iterator = self.dna.finditer(self.compsite, self.size)
878 self.results = []
879 modif = self._modify
880 revmodif = self._rev_modify
881 s = str(self)
882 self.on_minus = []
883 for start, group in iterator:
884 if group(s):
885 self.results += [r for r in modif(start)]
886 else:
887 self.on_minus += [r for r in revmodif(start)]
888 self.results += self.on_minus
889 if self.results:
890 self.results.sort()
891 self._drop()
892 return self.results
893
894 @classmethod
896 """RE.is_palindromic() -> bool.
897
898 True if the recognition site is a palindrom."""
899 return False
900
903 """Implement the methods specific to the enzymes for which the overhang
904 is unknown.
905
906 These enzymes are also NotDefined and NoCut.
907
908 Internal use only. Not meant to be instantiated."""
909
910 @classmethod
912 """RE.catalyse(dna, linear=True) -> tuple of DNA.
913 RE.catalyze(dna, linear=True) -> tuple of DNA.
914
915 return a tuple of dna as will be produced by using RE to restrict the
916 dna.
917
918 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
919
920 if linear is False, the sequence is considered to be circular and the
921 output will be modified accordingly."""
922 raise NotImplementedError('%s restriction is unknown.'
923 % self.__name__)
924 catalyze = catalyse
925
926 @classmethod
928 """RE.is_blunt() -> bool.
929
930 True if the enzyme produces blunt end.
931
932 see also:
933 RE.is_3overhang()
934 RE.is_5overhang()
935 RE.is_unknown()"""
936 return False
937
938 @classmethod
940 """RE.is_5overhang() -> bool.
941
942 True if the enzyme produces 5' overhang sticky end.
943
944 see also:
945 RE.is_3overhang()
946 RE.is_blunt()
947 RE.is_unknown()"""
948 return False
949
950 @classmethod
952 """RE.is_3overhang() -> bool.
953
954 True if the enzyme produces 3' overhang sticky end.
955
956 see also:
957 RE.is_5overhang()
958 RE.is_blunt()
959 RE.is_unknown()"""
960 return False
961
962 @classmethod
964 """RE.overhang() -> str. type of overhang of the enzyme.,
965
966 can be "3' overhang", "5' overhang", "blunt", "unknown" """
967 return 'unknown'
968
969 @classmethod
971 """RE.compatible_end() -> list.
972
973 list of all the enzymes that share compatible end with RE."""
974 return []
975
976 @classmethod
978 """RE._mod1(other) -> bool.
979
980 for internal use only
981
982 test for the compatibility of restriction ending of RE and other."""
983 return False
984
985
986 -class Blunt(AbstractCut):
987 """Implement the methods specific to the enzymes for which the overhang
988 is blunt.
989
990 The enzyme cuts the + strand and the - strand of the DNA at the same
991 place.
992
993 Internal use only. Not meant to be instantiated."""
994
995 @classmethod
997 """RE.catalyse(dna, linear=True) -> tuple of DNA.
998 RE.catalyze(dna, linear=True) -> tuple of DNA.
999
1000 return a tuple of dna as will be produced by using RE to restrict the
1001 dna.
1002
1003 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1004
1005 if linear is False, the sequence is considered to be circular and the
1006 output will be modified accordingly."""
1007 r = self.search(dna, linear)
1008 d = self.dna
1009 if not r:
1010 return d[1:],
1011 fragments = []
1012 length = len(r) - 1
1013 if d.is_linear():
1014
1015
1016
1017 fragments.append(d[1:r[0]])
1018 if length:
1019
1020
1021
1022 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1023
1024
1025
1026 fragments.append(d[r[-1]:])
1027 else:
1028
1029
1030
1031 fragments.append(d[r[-1]:] + d[1:r[0]])
1032 if not length:
1033
1034
1035
1036 return tuple(fragments)
1037
1038
1039
1040 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1041 return tuple(fragments)
1042 catalyze = catalyse
1043
1044 @classmethod
1046 """RE.is_blunt() -> bool.
1047
1048 True if the enzyme produces blunt end.
1049
1050 see also:
1051 RE.is_3overhang()
1052 RE.is_5overhang()
1053 RE.is_unknown()"""
1054 return True
1055
1056 @classmethod
1058 """RE.is_5overhang() -> bool.
1059
1060 True if the enzyme produces 5' overhang sticky end.
1061
1062 see also:
1063 RE.is_3overhang()
1064 RE.is_blunt()
1065 RE.is_unknown()"""
1066 return False
1067
1068 @classmethod
1070 """RE.is_3overhang() -> bool.
1071
1072 True if the enzyme produces 3' overhang sticky end.
1073
1074 see also:
1075 RE.is_5overhang()
1076 RE.is_blunt()
1077 RE.is_unknown()"""
1078 return False
1079
1080 @classmethod
1082 """RE.overhang() -> str. type of overhang of the enzyme.,
1083
1084 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1085 return 'blunt'
1086
1087 @classmethod
1089 """RE.compatible_end() -> list.
1090
1091 list of all the enzymes that share compatible end with RE."""
1092 if not batch:
1093 batch = AllEnzymes
1094 r = sorted(x for x in iter(AllEnzymes) if x.is_blunt())
1095 return r
1096
1097 @staticmethod
1099 """RE._mod1(other) -> bool.
1100
1101 for internal use only
1102
1103 test for the compatibility of restriction ending of RE and other."""
1104 return issubclass(other, Blunt)
1105
1106
1107 -class Ov5(AbstractCut):
1108 """Implement the methods specific to the enzymes for which the overhang
1109 is recessed in 3'.
1110
1111 The enzyme cuts the + strand after the - strand of the DNA.
1112
1113 Internal use only. Not meant to be instantiated."""
1114
1115 @classmethod
1117 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1118 RE.catalyze(dna, linear=True) -> tuple of DNA.
1119
1120 return a tuple of dna as will be produced by using RE to restrict the
1121 dna.
1122
1123 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1124
1125 if linear is False, the sequence is considered to be circular and the
1126 output will be modified accordingly."""
1127 r = self.search(dna, linear)
1128 d = self.dna
1129 if not r:
1130 return d[1:],
1131 length = len(r) - 1
1132 fragments = []
1133 if d.is_linear():
1134
1135
1136
1137 fragments.append(d[1:r[0]])
1138 if length:
1139
1140
1141
1142 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1143
1144
1145
1146 fragments.append(d[r[-1]:])
1147 else:
1148
1149
1150
1151 fragments.append(d[r[-1]:] + d[1:r[0]])
1152 if not length:
1153
1154
1155
1156 return tuple(fragments)
1157
1158
1159
1160 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1161 return tuple(fragments)
1162 catalyze = catalyse
1163
1164 @classmethod
1166 """RE.is_blunt() -> bool.
1167
1168 True if the enzyme produces blunt end.
1169
1170 see also:
1171 RE.is_3overhang()
1172 RE.is_5overhang()
1173 RE.is_unknown()"""
1174 return False
1175
1176 @classmethod
1178 """RE.is_5overhang() -> bool.
1179
1180 True if the enzyme produces 5' overhang sticky end.
1181
1182 see also:
1183 RE.is_3overhang()
1184 RE.is_blunt()
1185 RE.is_unknown()"""
1186 return True
1187
1188 @classmethod
1190 """RE.is_3overhang() -> bool.
1191
1192 True if the enzyme produces 3' overhang sticky end.
1193
1194 see also:
1195 RE.is_5overhang()
1196 RE.is_blunt()
1197 RE.is_unknown()"""
1198 return False
1199
1200 @classmethod
1202 """RE.overhang() -> str. type of overhang of the enzyme.,
1203
1204 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1205 return "5' overhang"
1206
1207 @classmethod
1209 """RE.compatible_end() -> list.
1210
1211 list of all the enzymes that share compatible end with RE."""
1212 if not batch:
1213 batch = AllEnzymes
1214 r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and x % self)
1215 return r
1216
1217 @classmethod
1218 - def _mod1(self, other):
1219 """RE._mod1(other) -> bool.
1220
1221 for internal use only
1222
1223 test for the compatibility of restriction ending of RE and other."""
1224 if issubclass(other, Ov5):
1225 return self._mod2(other)
1226 else:
1227 return False
1228
1229
1230 -class Ov3(AbstractCut):
1231 """Implement the methods specific to the enzymes for which the overhang
1232 is recessed in 5'.
1233
1234 The enzyme cuts the - strand after the + strand of the DNA.
1235
1236 Internal use only. Not meant to be instantiated."""
1237
1238 @classmethod
1240 """RE.catalyse(dna, linear=True) -> tuple of DNA.
1241 RE.catalyze(dna, linear=True) -> tuple of DNA.
1242
1243 return a tuple of dna as will be produced by using RE to restrict the
1244 dna.
1245
1246 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
1247
1248 if linear is False, the sequence is considered to be circular and the
1249 output will be modified accordingly."""
1250 r = self.search(dna, linear)
1251 d = self.dna
1252 if not r:
1253 return d[1:],
1254 fragments = []
1255 length = len(r) - 1
1256 if d.is_linear():
1257
1258
1259
1260 fragments.append(d[1:r[0]])
1261 if length:
1262
1263
1264
1265 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1266
1267
1268
1269 fragments.append(d[r[-1]:])
1270 else:
1271
1272
1273
1274 fragments.append(d[r[-1]:] + d[1:r[0]])
1275 if not length:
1276
1277
1278
1279 return tuple(fragments)
1280
1281
1282
1283 fragments += [d[r[x]:r[x + 1]] for x in range(length)]
1284 return tuple(fragments)
1285 catalyze = catalyse
1286
1287 @classmethod
1289 """RE.is_blunt() -> bool.
1290
1291 True if the enzyme produces blunt end.
1292
1293 see also:
1294 RE.is_3overhang()
1295 RE.is_5overhang()
1296 RE.is_unknown()"""
1297 return False
1298
1299 @classmethod
1301 """RE.is_5overhang() -> bool.
1302
1303 True if the enzyme produces 5' overhang sticky end.
1304
1305 see also:
1306 RE.is_3overhang()
1307 RE.is_blunt()
1308 RE.is_unknown()"""
1309 return False
1310
1311 @classmethod
1313 """RE.is_3overhang() -> bool.
1314
1315 True if the enzyme produces 3' overhang sticky end.
1316
1317 see also:
1318 RE.is_5overhang()
1319 RE.is_blunt()
1320 RE.is_unknown()"""
1321 return True
1322
1323 @classmethod
1325 """RE.overhang() -> str. type of overhang of the enzyme.,
1326
1327 can be "3' overhang", "5' overhang", "blunt", "unknown" """
1328 return "3' overhang"
1329
1330 @classmethod
1332 """RE.compatible_end() -> list.
1333
1334 list of all the enzymes that share compatible end with RE."""
1335 if not batch:
1336 batch = AllEnzymes
1337 r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and x % self)
1338 return r
1339
1340 @classmethod
1341 - def _mod1(self, other):
1342 """RE._mod1(other) -> bool.
1343
1344 for internal use only
1345
1346 test for the compatibility of restriction ending of RE and other."""
1347
1348
1349
1350 if issubclass(other, Ov3):
1351 return self._mod2(other)
1352 else:
1353 return False
1354
1357 """Implement the methods specific to the enzymes for which the overhang
1358 and the cut are not variable.
1359
1360 Typical example : EcoRI -> G^AATT_C
1361 The overhang will always be AATT
1362 Notes:
1363 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1364 There overhang is always the same : blunt!
1365
1366 Internal use only. Not meant to be instantiated."""
1367
1368 @classmethod
1370 """RE._drop() -> list.
1371
1372 for internal use only.
1373
1374 drop the site that are situated outside the sequence in linear sequence.
1375 modify the index for site in circular sequences."""
1376
1377
1378
1379
1380
1381
1382
1383
1384 length = len(self.dna)
1385 drop = itertools.dropwhile
1386 take = itertools.takewhile
1387 if self.dna.is_linear():
1388 self.results = [x for x in drop(lambda x:x < 1, self.results)]
1389 self.results = [x for x in take(lambda x:x < length, self.results)]
1390 else:
1391 for index, location in enumerate(self.results):
1392 if location < 1:
1393 self.results[index] += length
1394 else:
1395 break
1396 for index, location in enumerate(self.results[::-1]):
1397 if location > length:
1398 self.results[-(index + 1)] -= length
1399 else:
1400 break
1401 return
1402
1403 @classmethod
1405 """RE.is_defined() -> bool.
1406
1407 True if the sequence recognised and cut is constant,
1408 i.e. the recognition site is not degenerated AND the enzyme cut inside
1409 the site.
1410
1411 see also:
1412 RE.is_ambiguous()
1413 RE.is_unknown()"""
1414 return True
1415
1416 @classmethod
1418 """RE.is_ambiguous() -> bool.
1419
1420 True if the sequence recognised and cut is ambiguous,
1421 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1422 the site.
1423
1424 see also:
1425 RE.is_defined()
1426 RE.is_unknown()"""
1427 return False
1428
1429 @classmethod
1431 """RE.is_unknown() -> bool.
1432
1433 True if the sequence is unknown,
1434 i.e. the recognition site has not been characterised yet.
1435
1436 see also:
1437 RE.is_defined()
1438 RE.is_ambiguous()"""
1439 return False
1440
1441 @classmethod
1443 """RE.elucidate() -> str
1444
1445 return a representation of the site with the cut on the (+) strand
1446 represented as '^' and the cut on the (-) strand as '_'.
1447 ie:
1448 >>> EcoRI.elucidate() # 5' overhang
1449 'G^AATT_C'
1450 >>> KpnI.elucidate() # 3' overhang
1451 'G_GTAC^C'
1452 >>> EcoRV.elucidate() # blunt
1453 'GAT^_ATC'
1454 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1455 '? GTATAC ?'
1456 >>>
1457 """
1458 f5 = self.fst5
1459 f3 = self.fst3
1460 site = self.site
1461 if self.cut_twice():
1462 re = 'cut twice, not yet implemented sorry.'
1463 elif self.is_5overhang():
1464 if f5 == f3 == 0:
1465 re = 'N^' + self.site + '_N'
1466 elif f3 == 0:
1467 re = site[:f5] + '^' + site[f5:] + '_N'
1468 else:
1469 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1470 elif self.is_blunt():
1471 re = site[:f5] + '^_' + site[f5:]
1472 else:
1473 if f5 == f3 == 0:
1474 re = 'N_' + site + '^N'
1475 else:
1476 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:]
1477 return re
1478
1479 @classmethod
1480 - def _mod2(self, other):
1481 """RE._mod2(other) -> bool.
1482
1483 for internal use only
1484
1485 test for the compatibility of restriction ending of RE and other."""
1486
1487
1488
1489 if other.ovhgseq == self.ovhgseq:
1490 return True
1491 elif issubclass(other, Ambiguous):
1492 return other._mod2(self)
1493 else:
1494 return False
1495
1498 """Implement the methods specific to the enzymes for which the overhang
1499 is variable.
1500
1501 Typical example : BstXI -> CCAN_NNNN^NTGG
1502 The overhang can be any sequence of 4 bases.
1503 Notes:
1504 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N
1505 There overhang is always the same : blunt!
1506
1507 Internal use only. Not meant to be instantiated."""
1508
1509 @classmethod
1535
1536 @classmethod
1538 """RE.is_defined() -> bool.
1539
1540 True if the sequence recognised and cut is constant,
1541 i.e. the recognition site is not degenerated AND the enzyme cut inside
1542 the site.
1543
1544 see also:
1545 RE.is_ambiguous()
1546 RE.is_unknown()"""
1547 return False
1548
1549 @classmethod
1551 """RE.is_ambiguous() -> bool.
1552
1553 True if the sequence recognised and cut is ambiguous,
1554 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1555 the site.
1556
1557 see also:
1558 RE.is_defined()
1559 RE.is_unknown()"""
1560 return True
1561
1562 @classmethod
1564 """RE.is_unknown() -> bool.
1565
1566 True if the sequence is unknown,
1567 i.e. the recognition site has not been characterised yet.
1568
1569 see also:
1570 RE.is_defined()
1571 RE.is_ambiguous()"""
1572 return False
1573
1574 @classmethod
1575 - def _mod2(self, other):
1576 """RE._mod2(other) -> bool.
1577
1578 for internal use only
1579
1580 test for the compatibility of restriction ending of RE and other."""
1581
1582
1583
1584 if len(self.ovhgseq) != len(other.ovhgseq):
1585 return False
1586 else:
1587 se = self.ovhgseq
1588 for base in se:
1589 if base in 'ATCG':
1590 pass
1591 if base in 'N':
1592 se = '.'.join(se.split('N'))
1593 if base in 'RYWMSKHDBV':
1594 expand = '[' + matching[base] + ']'
1595 se = expand.join(se.split(base))
1596 if re.match(se, other.ovhgseq):
1597 return True
1598 else:
1599 return False
1600
1601 @classmethod
1603 """RE.elucidate() -> str
1604
1605 return a representation of the site with the cut on the (+) strand
1606 represented as '^' and the cut on the (-) strand as '_'.
1607 ie:
1608 >>> EcoRI.elucidate() # 5' overhang
1609 'G^AATT_C'
1610 >>> KpnI.elucidate() # 3' overhang
1611 'G_GTAC^C'
1612 >>> EcoRV.elucidate() # blunt
1613 'GAT^_ATC'
1614 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1615 '? GTATAC ?'
1616 >>>
1617 """
1618 f5 = self.fst5
1619 f3 = self.fst3
1620 length = len(self)
1621 site = self.site
1622 if self.cut_twice():
1623 re = 'cut twice, not yet implemented sorry.'
1624 elif self.is_5overhang():
1625 if f3 == f5 == 0:
1626 re = 'N^' + site + '_N'
1627 elif 0 <= f5 <= length and 0 <= f3 + length <= length:
1628 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:]
1629 elif 0 <= f5 <= length:
1630 re = site[:f5] + '^' + site[f5:] + f3 * 'N' + '_N'
1631 elif 0 <= f3 + length <= length:
1632 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:]
1633 elif f3 + length < 0:
1634 re = 'N^' * abs(f5) * 'N' + '_' + abs(length + f3) * 'N' + site
1635 elif f5 > length:
1636 re = site + (f5 - length) * 'N' + '^' + (length + f3 - f5) * 'N' + '_N'
1637 else:
1638 re = 'N^' + abs(f5) * 'N' + site + f3 * 'N' + '_N'
1639 elif self.is_blunt():
1640 if f5 < 0:
1641 re = 'N^_' + abs(f5) * 'N' + site
1642 elif f5 > length:
1643 re = site + (f5 - length) * 'N' + '^_N'
1644 else:
1645 raise ValueError('%s.easyrepr() : error f5=%i'
1646 % (self.name, f5))
1647 else:
1648 if f3 == 0:
1649 if f5 == 0:
1650 re = 'N_' + site + '^N'
1651 else:
1652 re = site + '_' + (f5 - length) * 'N' + '^N'
1653 elif 0 < f3 + length <= length and 0 <= f5 <= length:
1654 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:]
1655 elif 0 < f3 + length <= length:
1656 re = site[:f3] + '_' + site[f3:] + (f5 - length) * 'N' + '^N'
1657 elif 0 <= f5 <= length:
1658 re = 'N_' + 'N' * (f3 + length) + site[:f5] + '^' + site[f5:]
1659 elif f3 > 0:
1660 re = site + f3 * 'N' + '_' + (f5 - f3 - length) * 'N' + '^N'
1661 elif f5 < 0:
1662 re = 'N_' + abs(f3 - f5 + length) * 'N' + '^' + abs(f5) * 'N' + site
1663 else:
1664 re = 'N_' + abs(f3 + length) * 'N' + site + (f5 - length) * 'N' + '^N'
1665 return re
1666
1669 """Implement the methods specific to the enzymes for which the overhang
1670 is not characterised.
1671
1672 Correspond to NoCut and Unknown.
1673
1674 Internal use only. Not meant to be instantiated."""
1675
1676 @classmethod
1699
1700 @classmethod
1702 """RE.is_defined() -> bool.
1703
1704 True if the sequence recognised and cut is constant,
1705 i.e. the recognition site is not degenerated AND the enzyme cut inside
1706 the site.
1707
1708 see also:
1709 RE.is_ambiguous()
1710 RE.is_unknown()"""
1711 return False
1712
1713 @classmethod
1715 """RE.is_ambiguous() -> bool.
1716
1717 True if the sequence recognised and cut is ambiguous,
1718 i.e. the recognition site is degenerated AND/OR the enzyme cut outside
1719 the site.
1720
1721 see also:
1722 RE.is_defined()
1723 RE.is_unknown()"""
1724 return False
1725
1726 @classmethod
1728 """RE.is_unknown() -> bool.
1729
1730 True if the sequence is unknown,
1731 i.e. the recognition site has not been characterised yet.
1732
1733 see also:
1734 RE.is_defined()
1735 RE.is_ambiguous()"""
1736 return True
1737
1738 @classmethod
1739 - def _mod2(self, other):
1740 """RE._mod2(other) -> bool.
1741
1742 for internal use only
1743
1744 test for the compatibility of restriction ending of RE and other."""
1745
1746
1747
1748
1749
1750
1751 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!"
1752 % (str(self), str(other), str(self)))
1753
1754 @classmethod
1756 """RE.elucidate() -> str
1757
1758 return a representation of the site with the cut on the (+) strand
1759 represented as '^' and the cut on the (-) strand as '_'.
1760 ie:
1761 >>> EcoRI.elucidate() # 5' overhang
1762 'G^AATT_C'
1763 >>> KpnI.elucidate() # 3' overhang
1764 'G_GTAC^C'
1765 >>> EcoRV.elucidate() # blunt
1766 'GAT^_ATC'
1767 >>> SnaI.elucidate() # NotDefined, cut profile unknown.
1768 '? GTATAC ?'
1769 >>>
1770 """
1771 return '? %s ?' % self.site
1772
1775
1776
1777
1778
1779 """Implement the methods specific to the enzymes which are commercially
1780 available.
1781
1782 Internal use only. Not meant to be instantiated."""
1783
1784 @classmethod
1786 """RE.suppliers() -> print the suppliers of RE."""
1787 for s in self.suppliers_dict():
1788 print(s + ',')
1789 return
1790
1791 @classmethod
1793 """RE.supplier_list() -> list.
1794
1795 list of the supplier names for RE."""
1796 return [v[0] for k, v in suppliers_dict.items() if k in self.suppl]
1797
1798 @classmethod
1800 """RE.buffers(supplier) -> string.
1801
1802 not implemented yet."""
1803 return
1804
1805 @classmethod
1807 """RE.iscomm() -> bool.
1808
1809 True if RE has suppliers."""
1810 return True
1811
1814 """Implement the methods specific to the enzymes which are not commercially
1815 available.
1816
1817 Internal use only. Not meant to be instantiated."""
1818
1819 @staticmethod
1821 """RE.suppliers() -> print the suppliers of RE."""
1822 return None
1823
1824 @classmethod
1826 """RE.supplier_list() -> list.
1827
1828 list of the supplier names for RE."""
1829 return []
1830
1831 @classmethod
1833 """RE.buffers(supplier) -> string.
1834
1835 not implemented yet."""
1836 raise TypeError("Enzyme not commercially available.")
1837
1838 @classmethod
1840 """RE.iscomm() -> bool.
1841
1842 True if RE has suppliers."""
1843 return False
1844
1854
1855 - def __init__(self, first=[], suppliers=[]):
1856 """RestrictionBatch([sequence]) -> new RestrictionBatch."""
1857 first = [self.format(x) for x in first]
1858 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]]
1859 set.__init__(self, first)
1860 self.mapping = dict.fromkeys(self)
1861 self.already_mapped = None
1862
1864 if len(self) < 5:
1865 return '+'.join(self.elements())
1866 else:
1867 return '...'.join(('+'.join(self.elements()[:2]),
1868 '+'.join(self.elements()[-2:])))
1869
1871 return 'RestrictionBatch(%s)' % self.elements()
1872
1879
1882
1885
1886 - def get(self, enzyme, add=False):
1887 """B.get(enzyme[, add]) -> enzyme class.
1888
1889 if add is True and enzyme is not in B add enzyme to B.
1890 if add is False (which is the default) only return enzyme.
1891 if enzyme is not a RestrictionType or can not be evaluated to
1892 a RestrictionType, raise a ValueError."""
1893 e = self.format(enzyme)
1894 if e in self:
1895 return e
1896 elif add:
1897 self.add(e)
1898 return e
1899 else:
1900 raise ValueError('enzyme %s is not in RestrictionBatch'
1901 % e.__name__)
1902
1904 """B.lambdasplit(func) -> RestrictionBatch .
1905
1906 the new batch will contains only the enzymes for which
1907 func return True."""
1908 d = [x for x in filter(func, self)]
1909 new = RestrictionBatch()
1910 new._data = dict(zip(d, [True] * len(d)))
1911 return new
1912
1914 """B.add_supplier(letter) -> add a new set of enzyme to B.
1915
1916 letter represents the suppliers as defined in the dictionary
1917 RestrictionDictionary.suppliers
1918 return None.
1919 raise a KeyError if letter is not a supplier code."""
1920 supplier = suppliers_dict[letter]
1921 self.suppliers.append(letter)
1922 for x in supplier[1]:
1923 self.add_nocheck(eval(x))
1924 return
1925
1927 """B.current_suppliers() -> add a new set of enzyme to B.
1928
1929 return a sorted list of the suppliers which have been used to
1930 create the batch."""
1931 suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers)
1932 return suppl_list
1933
1935 """ b += other -> add other to b, check the type of other."""
1936 self.add(other)
1937 return self
1938
1940 """ b + other -> new RestrictionBatch."""
1941 new = self.__class__(self)
1942 new.add(other)
1943 return new
1944
1946 """B.remove(other) -> remove other from B if other is a RestrictionType.
1947
1948 Safe set.remove method. Verify that other is a RestrictionType or can be
1949 evaluated to a RestrictionType.
1950 raise a ValueError if other can not be evaluated to a RestrictionType.
1951 raise a KeyError if other is not in B."""
1952 return set.remove(self, self.format(other))
1953
1954 - def add(self, other):
1955 """B.add(other) -> add other to B if other is a RestrictionType.
1956
1957 Safe set.add method. Verify that other is a RestrictionType or can be
1958 evaluated to a RestrictionType.
1959 raise a ValueError if other can not be evaluated to a RestrictionType.
1960 """
1961 return set.add(self, self.format(other))
1962
1964 """B.add_nocheck(other) -> add other to B. don't check type of other.
1965 """
1966 return set.add(self, other)
1967
1984
1986 """B.is_restriction(y) -> bool.
1987
1988 True is y or eval(y) is a RestrictionType."""
1989 return isinstance(y, RestrictionType) or \
1990 isinstance(eval(str(y)), RestrictionType)
1991
1992 - def split(self, *classes, **bool):
1993 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch.
1994
1995 it works but it is slow, so it has really an interest when splitting
1996 over multiple conditions."""
1997 def splittest(element):
1998 for klass in classes:
1999 b = bool.get(klass.__name__, True)
2000 if issubclass(element, klass):
2001 if b:
2002 continue
2003 else:
2004 return False
2005 elif b:
2006 return False
2007 else:
2008 continue
2009 return True
2010 d = [k for k in filter(splittest, self)]
2011 new = RestrictionBatch()
2012 new._data = dict(zip(d, [True] * len(d)))
2013 return new
2014
2016 """B.elements() -> tuple.
2017
2018 give all the names of the enzymes in B sorted alphabetically."""
2019 l = sorted(str(e) for e in self)
2020 return l
2021
2023 """B.as_string() -> list.
2024
2025 return a list of the name of the elements of B."""
2026 return [str(e) for e in self]
2027
2028 @classmethod
2030 """B.suppl_codes() -> dict
2031
2032 letter code for the suppliers"""
2033 supply = dict((k, v[0]) for k, v in suppliers_dict.items())
2034 return supply
2035
2036 @classmethod
2038 """B.show_codes() -> letter codes for the suppliers"""
2039 supply = [' = '.join(i) for i in self.suppl_codes().items()]
2040 print('\n'.join(supply))
2041 return
2042
2043 - def search(self, dna, linear=True):
2044 """B.search(dna) -> dict."""
2045
2046
2047
2048
2049 if not hasattr(self, "already_mapped"):
2050
2051
2052 self.already_mapped = None
2053 if isinstance(dna, DNA):
2054
2055
2056
2057
2058 if (str(dna), linear) == self.already_mapped:
2059 return self.mapping
2060 else:
2061 self.already_mapped = str(dna), linear
2062 fseq = FormattedSeq(dna, linear)
2063 self.mapping = dict((x, x.search(fseq)) for x in self)
2064 return self.mapping
2065 elif isinstance(dna, FormattedSeq):
2066 if (str(dna), dna.linear) == self.already_mapped:
2067 return self.mapping
2068 else:
2069 self.already_mapped = str(dna), dna.linear
2070 self.mapping = dict((x, x.search(dna)) for x in self)
2071 return self.mapping
2072 raise TypeError("Expected Seq or MutableSeq instance, got %s instead"
2073 % type(dna))
2074
2075
2076
2077
2078
2079
2080
2081
2082 -class Analysis(RestrictionBatch, PrintFormat):
2083
2086 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class.
2087
2088 For most of the method of this class if a dictionary is given it will
2089 be used as the base to calculate the results.
2090 If no dictionary is given a new analysis using the Restriction Batch
2091 which has been given when the Analysis class has been instantiated."""
2092 RestrictionBatch.__init__(self, restrictionbatch)
2093 self.rb = restrictionbatch
2094 self.sequence = sequence
2095 self.linear = linear
2096 if self.sequence:
2097 self.search(self.sequence, self.linear)
2098
2100 return 'Analysis(%s,%s,%s)' %\
2101 (repr(self.rb), repr(self.sequence), self.linear)
2102
2104 """A._sub_set(other_set) -> dict.
2105
2106 Internal use only.
2107
2108 screen the results through wanted set.
2109 Keep only the results for which the enzymes is in wanted set.
2110 """
2111 return dict((k, v) for k, v in self.mapping.items() if k in wanted)
2112
2114 """A._boundaries(start, end) -> tuple.
2115
2116 Format the boundaries for use with the methods that limit the
2117 search to only part of the sequence given to analyse.
2118 """
2119 if not isinstance(start, int):
2120 raise TypeError('expected int, got %s instead' % type(start))
2121 if not isinstance(end, int):
2122 raise TypeError('expected int, got %s instead' % type(end))
2123 if start < 1:
2124 start += len(self.sequence)
2125 if end < 1:
2126 end += len(self.sequence)
2127 if start < end:
2128 pass
2129 else:
2130 start, end == end, start
2131 if start < 1:
2132 start == 1
2133 if start < end:
2134 return start, end, self._test_normal
2135 else:
2136 return start, end, self._test_reverse
2137
2139 """A._test_normal(start, end, site) -> bool.
2140
2141 Internal use only
2142 Test if site is in between start and end.
2143 """
2144 return start <= site < end
2145
2147 """A._test_reverse(start, end, site) -> bool.
2148
2149 Internal use only
2150 Test if site is in between end and start (for circular sequences).
2151 """
2152 return start <= site <= len(self.sequence) or 1 <= site < end
2153
2154 - def print_that(self, dct=None, title='', s1=''):
2155 """A.print_that([dct[, title[, s1]]]) -> print the results from dct.
2156
2157 If dct is not given the full dictionary is used.
2158 """
2159 if not dct:
2160 dct = self.mapping
2161 print("")
2162 return PrintFormat.print_that(self, dct, title, s1)
2163
2165 """A.change(**attribute_name) -> Change attribute of Analysis.
2166
2167 It is possible to change the width of the shell by setting
2168 self.ConsoleWidth to what you want.
2169 self.NameWidth refer to the maximal length of the enzyme name.
2170
2171 Changing one of these parameters here might not give the results
2172 you expect. In which case, you can settle back to a 80 columns shell
2173 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until
2174 you get it right."""
2175 for k, v in what.items():
2176 if k in ('NameWidth', 'ConsoleWidth'):
2177 setattr(self, k, v)
2178 self.Cmodulo = self.ConsoleWidth % self.NameWidth
2179 self.PrefWidth = self.ConsoleWidth - self.Cmodulo
2180 elif k is 'sequence':
2181 setattr(self, 'sequence', v)
2182 self.search(self.sequence, self.linear)
2183 elif k is 'rb':
2184 self = Analysis.__init__(self, v, self.sequence, self.linear)
2185 elif k is 'linear':
2186 setattr(self, 'linear', v)
2187 self.search(self.sequence, v)
2188 elif k in ('Indent', 'Maxsize'):
2189 setattr(self, k, v)
2190 elif k in ('Cmodulo', 'PrefWidth'):
2191 raise AttributeError(
2192 'To change %s, change NameWidth and/or ConsoleWidth'
2193 % name)
2194 else:
2195 raise AttributeError(
2196 'Analysis has no attribute %s' % name)
2197 return
2198
2199 - def full(self, linear=True):
2200 """A.full() -> dict.
2201
2202 Full Restriction Map of the sequence."""
2203 return self.mapping
2204
2205 - def blunt(self, dct=None):
2206 """A.blunt([dct]) -> dict.
2207
2208 Only the enzymes which have a 3'overhang restriction site."""
2209 if not dct:
2210 dct = self.mapping
2211 return dict((k, v) for k, v in dct.items() if k.is_blunt())
2212
2214 """A.overhang5([dct]) -> dict.
2215
2216 Only the enzymes which have a 5' overhang restriction site."""
2217 if not dct:
2218 dct = self.mapping
2219 return dict((k, v) for k, v in dct.items() if k.is_5overhang())
2220
2222 """A.Overhang3([dct]) -> dict.
2223
2224 Only the enzymes which have a 3'overhang restriction site."""
2225 if not dct:
2226 dct = self.mapping
2227 return dict((k, v) for k, v in dct.items() if k.is_3overhang())
2228
2230 """A.defined([dct]) -> dict.
2231
2232 Only the enzymes that have a defined restriction site in Rebase."""
2233 if not dct:
2234 dct = self.mapping
2235 return dict((k, v) for k, v in dct.items() if k.is_defined())
2236
2238 """A.with_sites([dct]) -> dict.
2239
2240 Enzymes which have at least one site in the sequence."""
2241 if not dct:
2242 dct = self.mapping
2243 return dict((k, v) for k, v in dct.items() if v)
2244
2246 """A.without_site([dct]) -> dict.
2247
2248 Enzymes which have no site in the sequence."""
2249 if not dct:
2250 dct = self.mapping
2251 return dict((k, v) for k, v in dct.items() if not v)
2252
2254 """A.With_N_Sites(N [, dct]) -> dict.
2255
2256 Enzymes which cut N times the sequence."""
2257 if not dct:
2258 dct = self.mapping
2259 return dict((k, v) for k, v in dct.items()if len(v) == N)
2260
2262 if not dct:
2263 dct = self.mapping
2264 return dict((k, v) for k, v in dct.items() if len(v) in list)
2265
2267 """A.with_name(list_of_names [, dct]) ->
2268
2269 Limit the search to the enzymes named in list_of_names."""
2270 for i, enzyme in enumerate(names):
2271 if enzyme not in AllEnzymes:
2272 print("no data for the enzyme: %s" % name)
2273 del names[i]
2274 if not dct:
2275 return RestrictionBatch(names).search(self.sequence)
2276 return dict((n, dct[n]) for n in names if n in dct)
2277
2279 """A.with_site_size(site_size [, dct]) ->
2280
2281 Limit the search to the enzymes whose site is of size <site_size>."""
2282 sites = [name for name in self if name.size == site_size]
2283 if not dct:
2284 return RestrictionBatch(sites).search(self.sequence)
2285 return dict((k, v) for k, v in dct.items() if k in site_size)
2286
2288 """A.only_between(start, end[, dct]) -> dict.
2289
2290 Enzymes that cut the sequence only in between start and end."""
2291 start, end, test = self._boundaries(start, end)
2292 if not dct:
2293 dct = self.mapping
2294 d = dict(dct)
2295 for key, sites in dct.items():
2296 if not sites:
2297 del d[key]
2298 continue
2299 for site in sites:
2300 if test(start, end, site):
2301 continue
2302 else:
2303 del d[key]
2304 break
2305 return d
2306
2307 - def between(self, start, end, dct=None):
2308 """A.between(start, end [, dct]) -> dict.
2309
2310 Enzymes that cut the sequence at least in between start and end.
2311 They may cut outside as well."""
2312 start, end, test = self._boundaries(start, end)
2313 d = {}
2314 if not dct:
2315 dct = self.mapping
2316 for key, sites in dct.items():
2317 for site in sites:
2318 if test(start, end, site):
2319 d[key] = sites
2320 break
2321 continue
2322 return d
2323
2325 """A.show_only_between(start, end [, dct]) -> dict.
2326
2327 Enzymes that cut the sequence outside of the region
2328 in between start and end but do not cut inside."""
2329 d = []
2330 if start <= end:
2331 d = [(k, [vv for vv in v if start <= vv <= end])
2332 for v in self.between(start, end, dct)]
2333 else:
2334 d = [(k, [vv for vv in v if start <= vv or vv <= end])
2335 for v in self.between(start, end, dct)]
2336 return dict(d)
2337
2339 """A.only_outside(start, end [, dct]) -> dict.
2340
2341 Enzymes that cut the sequence outside of the region
2342 in between start and end but do not cut inside."""
2343 start, end, test = self._boundaries(start, end)
2344 if not dct:
2345 dct = self.mapping
2346 d = dict(dct)
2347 for key, sites in dct.items():
2348 if not sites:
2349 del d[key]
2350 continue
2351 for site in sites:
2352 if test(start, end, site):
2353 del d[key]
2354 break
2355 else:
2356 continue
2357 return d
2358
2359 - def outside(self, start, end, dct=None):
2360 """A.outside((start, end [, dct]) -> dict.
2361
2362 Enzymes that cut outside the region in between start and end.
2363 No test is made to know if they cut or not inside this region."""
2364 start, end, test = self._boundaries(start, end)
2365 if not dct:
2366 dct = self.mapping
2367 d = {}
2368 for key, sites in dct.items():
2369 for site in sites:
2370 if test(start, end, site):
2371 continue
2372 else:
2373 d[key] = sites
2374 break
2375 return d
2376
2378 """A.do_not_cut(start, end [, dct]) -> dict.
2379
2380 Enzymes that do not cut the region in between start and end."""
2381 if not dct:
2382 dct = self.mapping
2383 d = self.without_site()
2384 d.update(self.only_outside(start, end, dct))
2385 return d
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409 CommOnly = RestrictionBatch()
2410 NonComm = RestrictionBatch()
2411 for TYPE, (bases, enzymes) in typedict.items():
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429 bases = tuple(eval(x) for x in bases)
2430
2431
2432
2433
2434 T = type.__new__(RestrictionType, 'RestrictionType', bases, {})
2435 for k in enzymes:
2436
2437
2438
2439
2440
2441 newenz = T(k, bases, enzymedict[k])
2442
2443
2444
2445
2446
2447 if newenz.is_comm():
2448 CommOnly.add_nocheck(newenz)
2449 else:
2450 NonComm.add_nocheck(newenz)
2451
2452
2453
2454 AllEnzymes = CommOnly | NonComm
2455
2456
2457
2458 names = [str(x) for x in AllEnzymes]
2459 try:
2460 del x
2461 except NameError:
2462
2463 pass
2464 locals().update(dict(zip(names, AllEnzymes)))
2465 __all__ = ['FormattedSeq', 'Analysis', 'RestrictionBatch', 'AllEnzymes', 'CommOnly', 'NonComm'] + names
2466 del k, enzymes, TYPE, bases, names
2467